"""
Scripts that run the estimation of markups

@author:  Burstein, Carvalho, Grassi
"""
    
############################
# Import modules & Packages
############################
import numpy as np
import pandas as pd
from collections import OrderedDict
import matplotlib as plt
plt.rcParams['text.usetex'] = False; plt.rcParams['text.latex.unicode'] = False # this is needed on the BOX

import time as tm
from time import time

#import the class to estimate prod functions
from ProdFun import ProdFun


#######################################################################################################
#Specification Number and  Choice
######################################################################################################

## Do you want to save results?
saveresults = True
#saveresults = False

##number of the specification
specs='FSQ_baseline'


################################################################################################
# Import data
################################################################################################
t0 = time()

##Here is the path
path = 'C:/Users/Public/Documents/BCG_DGM/BCG/replication_file_jan25_package/' #you need to specify the path to have it working from the shell


## import data
dta_deep=pd.read_stata(path+'data/data_smallest_for_python_baseline.dta') 

# rename columns
dta_deep.columns =['firmid','date','sector_2d','sale','v','k','m','o','s','p','q','ms']




#%%
######################################################################################################
# Set up the specification
######################################################################################################
## Choose your output variable y
dta_deep['y'] = dta_deep['q']

##Do you want to do a first-stage
purge=True


## set how to use variables
vars_how = OrderedDict() # ordered dictionary, coefficients are ordered as the insertion order

## choose the variable inputs
vars_how['m'] = ['purge','static','variable']

## choose the other inputs
vars_how['v'] = ['purge','dynamic','fixed']
vars_how['o'] = ['purge','static','fixed']
vars_how['k'] = ['purge','dynamic','fixed']

## Choose the variables to put in the first stage
vars_how['ms'] = ['purge','no input','no input']
vars_how['p'] = ['purge','no input','no input']


## aditional controls for the initial value (OLS estimation)
init_control1 = []

##Old choice that should be discard (price as contronls in the AR process)
#AR_price=True  
AR_price=False 

## Do you want to include a second order term in the AR process?
AR_sqlag=False 

## form of the production functionlu
GMM_cons = True #do you want to estimate a constant

translog = True; tl_inter = True # Choice of Translog with interaction terms



## Do you want to compute standard errors
se1=False 

## Do you want to plot the objectif function
#plot_moments_fn1 = True
plot_moments_fn1 = False

#save_heatmap = 'Heatmap'; save_3D = '3D'
save_heatmap = ''; save_3D = ''



## Choose of the minimization algorithm.
#optimOp = 'BASINHOPPING'
#optimOp = 'FSOLVE'
optimOp = 'NM'
##Do you want to normalized the objectif function
NormMoments=True
#NormMoments=False

## Do you want to compute/save markups?
markups = True #if you want to save markup
#markups = False #if you don't want to save markup

## Do you want to do the sales correction
sales_correction=False #if you want to compute sales correction
#sales_correction=True #if you want to compute sales correction


#%%
######################################################################################################
# Fit GMM
######################################################################################################
t1 = time()

# creating the for loop: listing all sectors
ind_list = dta_deep.sector_2d.unique()
ind_list = ind_list[np.logical_not(np.isnan(ind_list))]
ind_list.sort()




for ind in ind_list:
    
    #tm.sleep(2)
    
    #some sector specific optimizer choice

    if ind==8: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    elif ind==13: 
        optimOp_ind='FSOLVE' 
        optimOp_ind_CD='FSOLVE'
        NormMoments_ind=False
    elif ind==15: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    elif ind==18: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    elif ind==23: 
        optimOp_ind='FSOLVE' 
        optimOp_ind_CD='FSOLVE'
        NormMoments_ind=True
    elif ind==25: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    elif ind==27: 
        optimOp_ind='FSOLVE' 
        optimOp_ind_CD='FSOLVE'
        NormMoments_ind=False
    elif ind==29: 
        optimOp_ind='FSOLVE' 
        optimOp_ind_CD='FSOLVE'
        NormMoments_ind=True
    elif ind==31: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    elif ind==32: 
        optimOp_ind='NM' 
        optimOp_ind_CD='NM'
        NormMoments_ind=False
    else:
        optimOp_ind=optimOp 
        NormMoments_ind=NormMoments
        optimOp_ind_CD=optimOp
        

            
    
    # keep one industry
    dta_iter = dta_deep.loc[dta_deep['sector_2d'] == ind]
    
    print('')
    print('Industry ' + str(ind) + ' / ' + str(ind_list[-1]))
    print('With ' + str( dta_iter['y'].notnull().sum()) + ' obs' )
    print('----------------')
    
    
    
    
    # estimate the model
#    translog1 = True
    
    print('')
    print('Fitting Cobb-Douglass')
    P_CD = ProdFun(dta_iter, vars_how,
                   purging = purge, 
                   M=3,
#                   keep_flag=False,
                   #beta_init = initial_values['beta_m_cd'].values,
                   translog = False,
                   tl_inter = False,
                   GMM_cons = GMM_cons,
#                   GMM_cons = False,
#                   demean=True,
                   optim=optimOp_ind_CD,
                   AR_sqlag=AR_sqlag,
                   AR_price=AR_price,
                   FE=['date'],
                   FE_demean=False,
                   init_FE=False,
                   NormMoments=NormMoments_ind,
#                   LogMoments=True,
                   se = se1,
                   markups=True,
                   sales_correction=sales_correction,
#                   plot_moments_fn = True,
                   plot_moments_fn_coeff = [1,2],
                   verbose=True
                   )

# =============================================================================

    print('--')
    print('Fitting Translog')
    P_TL = ProdFun(dta_iter, vars_how,
                   purging = purge,
                   M=3,
#                   keep_flag=False,
                   #beta_init = np.array(P_TL_rts.betas),
                  # beta_init = initial_values[[ 'betam_tl',  'betam2_tl',  'beta_mv_tl', 'beta_mo_tl', 'beta_mk_tl']].values.transpose(),
                   translog = True,
                   tl_inter = tl_inter,
                   GMM_cons = GMM_cons,
                   optim=optimOp_ind,
                   AR_sqlag=AR_sqlag,
                   AR_price=AR_price,
                   FE=['date'],
                   FE_demean=False,
                   NormMoments=NormMoments_ind,
                   se = se1,
                   markups=True,
                   sales_correction=sales_correction,
                   verbose=True
                   )
    
    
   
    
    # store betas in a different dataframe   
    cols_TL=[]
    for name in P_TL.variables:
        cols_TL.append('beta_' + name +'_tl' )
        
    cols_CD=[]
    for name in P_CD.variables:
        cols_CD.append('beta_' + name +'_cd' )
        
    cols=cols_CD + cols_TL
    
    
    
    beta_iter = pd.DataFrame([np.concatenate((P_CD.betas,P_TL.betas))], columns = cols) 
    beta_iter_CD = pd.DataFrame([P_CD.betas], columns = cols_CD) 
    beta_iter_TL = pd.DataFrame([P_TL.betas], columns = cols_TL) 
    
    
    
    beta_iter['N'] = dta_iter['p'].notnull().sum()
    beta_iter_CD['N'] = dta_iter['p'].notnull().sum()
    beta_iter_TL['N'] = dta_iter['p'].notnull().sum()
    
    beta_iter['med_CD'] = np.median(P_CD.dta_mu['mu_m'])
    beta_iter['med_TL'] = np.median(P_TL.dta_mu['mu_m'])
    beta_iter_CD['med'] = np.median(P_CD.dta_mu['mu_m'])
    beta_iter_TL['med'] = np.median(P_TL.dta_mu['mu_m'])
    
    beta_iter['iqr_CD'] = ( np.quantile(P_CD.dta_mu['mu_m'],0.75) - np.quantile(P_CD.dta_mu['mu_m'],0.25) ) / np.quantile(P_CD.dta_mu['mu_m'],0.5)
    beta_iter['iqr_TL'] = ( np.quantile(P_TL.dta_mu['mu_m'],0.75) - np.quantile(P_TL.dta_mu['mu_m'],0.25) ) / np.quantile(P_TL.dta_mu['mu_m'],0.5)
    beta_iter_CD['iqr'] = ( np.quantile(P_CD.dta_mu['mu_m'],0.75) - np.quantile(P_CD.dta_mu['mu_m'],0.25) ) / np.quantile(P_CD.dta_mu['mu_m'],0.5)
    beta_iter_TL['iqr'] =  ( np.quantile(P_TL.dta_mu['mu_m'],0.75) - np.quantile(P_TL.dta_mu['mu_m'],0.25) ) / np.quantile(P_TL.dta_mu['mu_m'],0.5)
    
    beta_iter['naf2d'] = ind
    beta_iter_CD['naf2d'] = ind
    beta_iter_TL['naf2d'] = ind
    
    beta_iter['rho_cd'] = P_CD.g_b[1,0]
    beta_iter['rho_tl'] = P_TL.g_b[1,0]
    beta_iter_CD['rho'] = P_CD.g_b[1,0]
    beta_iter_TL['rho'] = P_TL.g_b[1,0]
    

    if ind == ind_list[0]: # first iteration of for loop
        dta_epsilon = P_CD.dta_mu[['date','epsilon']]
        dta_betas = beta_iter
        dta_betas_CD = beta_iter_CD
        dta_betas_TL = beta_iter_TL
        if se1:
             dta_betas_se_CD = pd.DataFrame([P_CD.ses], columns = cols_CD)
             dta_betas_se_TL = pd.DataFrame([P_TL.ses], columns = cols_TL)
    else:
        dta_epsilon = dta_epsilon.append(P_CD.dta_mu[['date','epsilon']])
        dta_betas = dta_betas.append(beta_iter)
        dta_betas_CD = dta_betas_CD.append(beta_iter_CD)
        dta_betas_TL = dta_betas_TL.append(beta_iter_TL)
        if se1:
            dta_betas_se_CD = dta_betas_se_CD.append(pd.DataFrame([P_CD.ses], columns = cols_CD))
            dta_betas_se_TL = dta_betas_se_TL.append(pd.DataFrame([P_TL.ses], columns = cols_TL))

            
            
    #save the markups in a data set
    if markups:
        if ind == ind_list[0]: # first iteration of for loop
            dta_mu_CD = P_CD.dta_mu
            dta_mu_TL = P_TL.dta_mu
        else:
            dta_mu_CD = dta_mu_CD.append(P_CD.dta_mu)
            dta_mu_TL = dta_mu_TL.append(P_TL.dta_mu)
        
    
            

# save resulting coefficients and epsilon to stata data
if saveresults:
    dta_betas.to_stata(path+'data/coefficients_byind_Python_' + specs + '_.dta',write_index=False)
#    dta_epsilon.to_stata(path+'data/epsilons_Python.dta',write_index=False)


print('')
print('-----------------   Specs: '+ specs + '-----------------------') 
print('')
print('Resulting Betas for CD:')    
print(dta_betas_CD)
print('')
print('Resulting Betas for TL:')    
print(dta_betas_TL)


#np.savetxt(path+'dta_betas_CD.txt', dta_betas_CD, fmt='%d')
if saveresults:

    with pd.ExcelWriter(path+'results/dta_betas' + specs + '_.xlsx') as writer:
        dta_betas.to_excel(writer, sheet_name = 'dta_betas', index=False)
        dta_betas_CD.to_excel(writer, sheet_name = 'dta_betas_CD', index=False)
        dta_betas_TL.to_excel(writer, sheet_name = 'dta_betas_TL', index=False)
        if se1:
            dta_betas_se_CD.to_excel(writer, sheet_name = 'dta_betas_se_CD', index=False)
            dta_betas_se_TL.to_excel(writer, sheet_name = 'dta_betas_se_TL', index=False)




#Clean and export the markups data
if markups:
    mu_name = []
    mu_name_CD = []
    mu_name_TL = []
    for key in vars_how:
            if vars_how[key][2] == 'variable':
                mu_name.append('mu_'+ key)
                mu_name_CD.append('mu_cd_'+ key)
                mu_name_TL.append('mu_tl_'+ key)
                
    dta_mu_CD=dta_mu_CD[ ['firmid','date','phi','epsilon'] + mu_name ]
    dta_mu_TL=dta_mu_TL[ ['firmid','date','phi','epsilon'] + mu_name ]
    
    dta_mu_CD.columns = ['firmid','date','phi_cd','epsilon_cd'] + mu_name_CD
    dta_mu_TL.columns = ['firmid','date','phi_tl','epsilon_tl'] + mu_name_TL
    
    
    #dta_mu = dta_mu_CD.join(dta_mu_TL)
    
    if saveresults:
        dta_mu_CD=dta_mu_CD.replace([np.inf,-np.inf],np.nan).dropna()
        dta_mu_TL=dta_mu_TL.replace([np.inf,-np.inf],np.nan).dropna()
    
        
        dta_mu_CD.to_stata(path+'data/dta_mu_CD_' + specs + '_.dta',write_index=False)
        dta_mu_TL.to_stata(path+'data/dta_mu_TL_' + specs + '_.dta',write_index=False)
    

t2 = time()

print('')
print('')
print('Elapsed time ' + str(np.round(t2-t0,2))+'s - '+str(np.round((t2-t0)/3600,2))+'h')
print('Cleaning time ' + str(np.round(t1-t0,2)))
print('Fitting time ' + str(np.round(t2-t1,2))+'s - '+str(np.round((t2-t1)/3600,2))+'h')
print('')
print('')




